探討地震、海洋之間的關係

Big Data Analytical Method Final Project

  • B0544255 許懿傑

Dev. Environment & Software

  • MacOS 10.13.6 High Sierra
  • RStudio v1.1.463
  • MacOS 10.14.5 Mojave
  • RStudio v1.2.1335

FAQ

Sli.do_room

Sli.do_room


Library Import

library(xml2)
library(jsonlite)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(purrr)
library(data.table)
library(plotly)
library(lubridate) # time formatting
library(zoo) # time formatting

Data Import

typhoon_all <- read.csv("https://raw.githubusercontent.com/jason19970210/BigDataAnalyticalMethods/master/Final/Data/typhoon/typhoon_web_table.csv",stringsAsFactors = F)
eq_xml_url_base <- paste("https://raw.githubusercontent.com/jason19970210/BigDataAnalyticalMethods/master/Final/Data/earthquake/CWB-EQ-Catalog-%d","xml",sep = ".")
sea_level <- fromJSON("https://opendata.cwb.gov.tw/fileapi/v1/opendataapi/C-B0048-001?Authorization=CWB-64CBB768-EE64-4FD2-AED5-0A68D1A48B79&downloadType=WEB&format=JSON")
seatemp <- fromJSON("https://opendata.cwb.gov.tw/fileapi/v1/opendataapi/C-B0050-001?Authorization=CWB-64CBB768-EE64-4FD2-AED5-0A68D1A48B79&downloadType=WEB&format=JSON")

Data 1st Processing (Make Data Frame)

Earthquake data
map_df(1990:2018, function(i){
  xml_url <- read_xml(sprintf(eq_xml_url_base,i))
  eqinfo <- xml_find_all(xml_url, ".//earthquakeinfo")
  
  #tibble::tibble
  data.frame(originTime = xml_text(xml_find_first(eqinfo, ".//originTime")),
             epicenterLon = xml_text(xml_find_first(eqinfo, ".//epicenterLon")),
             epicenterLat = xml_text(xml_find_first(eqinfo, ".//epicenterLat")),
             depth = xml_text(xml_find_first(eqinfo, ".//depth")),
             magnitudeValue = xml_text(xml_find_first(eqinfo, ".//magnitudeValue")),
             gap = xml_text(xml_find_first(eqinfo, "./gap")),
             rms = xml_text(xml_find_first(eqinfo, "./rms")),
             erh = xml_text(xml_find_first(eqinfo, "./erh")),
             erz = xml_text(xml_find_first(eqinfo, "./erz")),
             quality = xml_text(xml_find_first(eqinfo, "./quality")),
             
             stringsAsFactors = FALSE   #doesnt have this parameter in tibble
  ) #end of data.frame
}) -> eq_df
Sea Level Data
sea_level <- sea_level$Cwbopendata$dataset$location

sea_level <- data.table(sea_level)
sea_level[, number := 1:nrow(sea_level)]
sea_level[, yyyymm := ItemValue[[.N]][[1]], by = number]
sea_level[, MeanSeaLevel := ItemValue[[.N]][2], by = number]
sea_level[, MeanSeaLevel_unit := ItemUnit[[.N]][1], by = number]
sea_level[, HHWL := ItemValue[[.N]][3], by = number]
sea_level[, HHWL_unit := ItemUnit[[.N]][2], by = number]
sea_level[, LLWL := ItemValue[[.N]][4], by = number]
sea_level[, LLWL_unit := ItemUnit[[.N]][3], by = number]

sea_level <- select(sea_level, SiteName, SiteId, yyyymm, MeanSeaLevel, MeanSeaLevel_unit, HHWL, HHWL_unit, LLWL, LLWL_unit)
Sea Temperture Data
seatemp <- seatemp$Cwbopendata$dataset$location

sea_test2 <- data.frame(obsrtime = NA,
                        MonthAvg = NA,
                        MonthHigh = NA,
                        MonthHighTime = NA,
                       MonthLow = NA,
                       MonthLowTime = NA,
                       LocationName = NA,
                       StationID = NA,
                       lon = NA,
                       lat = NA)


for (j in 1:nrow(seatemp)){
  
  sea_test <- data.frame(obsrtime = NA,
                         MonthAvg = NA,
                         MonthHigh = NA,
                         MonthHighTime = NA,
                         MonthLow = NA,
                         MonthLowTime = NA)
  
  for (i in 1:length(seatemp$time[[j]]$obsrtime)) {
    sea_temp  <- c(seatemp$time[[j]]$obsrtime[i], seatemp$time[[j]]$weatherElement$elementValue[[i]]$vlaue)
    sea_test <- rbind(sea_test, sea_temp)
  }
  sea_test <- sea_test[-1,]
  sea_test$LocationName <- seatemp$LocationName[j]
  sea_test$StationID <- seatemp$StationID[j]
  sea_test$lon <- seatemp$lon[j]
  sea_test$lat <- seatemp$lat[j]
  
  sea_test2 <- rbind(sea_test2, sea_test)
}

sea_test2 <- sea_test2[-1,]
seatemp <- select(sea_test2, 
                    LocationName, 
                    StationID, 
                    lon,
                    lat, 
                    obsrtime, 
                    MonthAvg, 
                    MonthHigh, 
                    MonthHighTime, 
                    MonthLow, 
                    MonthLowTime)

Data 2nd Processing

Re-Typing & Filter
eq_df
eq_df$originTime <- as.Date(eq_df$originTime)
eq_df$epicenterLon <- as.numeric(eq_df$epicenterLon)
eq_df$epicenterLat <- as.numeric(eq_df$epicenterLat)
eq_df$depth <- as.numeric(eq_df$depth)
eq_df$magnitudeValue <- as.numeric(eq_df$magnitudeValue)
eq_df$gap <- as.numeric(eq_df$gap)
eq_df$rms <- as.numeric(eq_df$rms)
eq_df$erh <- as.numeric(eq_df$erh)
eq_df$erz <- as.numeric(eq_df$erz)
# Without NA error message
sea_level
sea_level$year <- as.numeric(substring(sea_level$yyyymm, 1,4))
sea_level$month <- as.numeric(substring(sea_level$yyyymm, 5,6))
sea_level$MeanSeaLevel <- as.numeric(sea_level$MeanSeaLevel)
sea_level$HHWL <- as.numeric(sea_level$HHWL)
sea_level$LLWL <- as.numeric(sea_level$LLWL)
# Without NA error message

sea_level <- sea_level %>% filter(.$month != 0) %>% filter(.$year <= 2017 & .$year >= 2012)
seatemp
seatemp$lon <- as.numeric(seatemp$lon)
seatemp$lat <- as.numeric(seatemp$lat)
seatemp$obsrtime_year <- as.numeric(substring(seatemp$obsrtime, 1,4))
seatemp$MonthAvg <- as.numeric(seatemp$MonthAvg)
seatemp$MonthHigh <- as.numeric(seatemp$MonthHigh)
seatemp$MonthLow <- as.numeric(seatemp$MonthLow)
# Without NA error message

New df

sub_level <- sea_level %>% filter(.$SiteName %in% c("高雄市 高雄","連江縣 馬祖","基隆市 基隆","臺中市 臺中港","臺東縣 蘭嶼")) %>% as.data.table()
sub_temp <- seatemp %>% filter(.$LocationName %in% c("高雄","馬祖","基隆","臺中港","蘭嶼"))
data.table(sub_level)
##         SiteName SiteId yyyymm MeanSeaLevel MeanSeaLevel_unit  HHWL
##   1: 臺東縣 蘭嶼   1396 201201        -67.9                cm    NA
##   2: 臺東縣 蘭嶼   1396 201202        -52.6                cm    NA
##   3: 臺東縣 蘭嶼   1396 201203        -45.0                cm    NA
##   4: 臺東縣 蘭嶼   1396 201204        -47.1                cm    NA
##   5: 臺東縣 蘭嶼   1396 201205        -37.8                cm    NA
##  ---                                                               
## 344: 連江縣 馬祖   1926 201707       -111.9                cm 199.6
## 345: 連江縣 馬祖   1926 201708       -105.8                cm 208.9
## 346: 連江縣 馬祖   1926 201709        -90.1                cm 200.1
## 347: 連江縣 馬祖   1926 201711        -92.2                cm 210.7
## 348: 連江縣 馬祖   1926 201712       -104.9                cm 214.7
##      HHWL_unit   LLWL LLWL_unit year month
##   1:        cm     NA        cm 2012     1
##   2:        cm     NA        cm 2012     2
##   3:        cm     NA        cm 2012     3
##   4:        cm     NA        cm 2012     4
##   5:        cm     NA        cm 2012     5
##  ---                                      
## 344:        cm -454.1        cm 2017     7
## 345:        cm -435.5        cm 2017     8
## 346:        cm -411.4        cm 2017     9
## 347:        cm -431.9        cm 2017    11
## 348:        cm -480.3        cm 2017    12
sub_level[,LocationName := ifelse(SiteName == "高雄市 高雄", "高雄", 
                                  ifelse(SiteName == "連江縣 馬祖", "馬祖",
                                        ifelse(SiteName == "基隆市 基隆", "基隆",
                                               ifelse(SiteName == "臺中市 臺中港", "臺中港",
                                                      ifelse(SiteName == "臺東縣 蘭嶼", "蘭嶼" ,"")
                                                      )
                                               )
                                        )
                                 )
          ]

sub_level$yyyymm <- paste0(sub_level$year,"/",sub_level$month)
sub_level_temp <- left_join(sub_level,sub_temp, by = c("LocationName","yyyymm"="obsrtime"))

Including Plots

cor

cor(sub_level_temp$MeanSeaLevel, sub_level_temp$MonthAvg, use = "complete.obs")
## [1] 0.3998575
plot_ly(x = ~sub_level_temp$MonthAvg, y = ~sub_level_temp$MeanSeaLevel, type = 'scatter', mode = 'markers', color = ~sub_level_temp$LocationName)
## Warning: Ignoring 7 observations
plot_ly(x = ~sub_level_temp$lat, y = ~sub_level_temp$lon, z = ~sub_level_temp$MeanSeaLevel, type = 'scatter3d', color = ~sub_level_temp$LocationName)
## Warning: Ignoring 6 observations
# 相關係數級距:中度相關
plot_ly(x = ~sub_level_temp$lat, y = ~sub_level_temp$lon, z = ~sub_level_temp$MonthAvg, type = 'scatter3d', color = ~sub_level_temp$LocationName)
## Warning: Ignoring 7 observations
cor.test(sub_level_temp$MeanSeaLevel, sub_level_temp$MonthAvg, use = "complete.obs")
## 
##  Pearson's product-moment correlation
## 
## data:  sub_level_temp$MeanSeaLevel and sub_level_temp$MonthAvg
## t = 8.0322, df = 339, p-value = 1.598e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3066752 0.4854478
## sample estimates:
##       cor 
## 0.3998575
sub_level_temp$ym <- gsub("/","-",sub_level_temp$yyyymm)
sub_level_temp$ym <- as.Date(as.yearmon(sub_level_temp$ym))
sub_level_temp %>% plot_ly(x = ~.$ym, y = ~.$MonthAvg, type = 'scatter', mode = 'lines+markers', color = ~.$LocationName)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.